In [ ]:
from datetime import datetime
import json
from os import listdir
from os.path import exists
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig,
pipeline,
)
/workspace/SPAR/interp-la/.venv/lib/python3.10/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from tqdm.autonotebook import tqdm, trange
Load model¶
In [ ]:
model_id = "microsoft/Phi-3-mini-4k-instruct"
# Quantization configuration
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
In [ ]:
# Load the model with quantization
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# check device
model.device
Create text to continued by model¶
In [ ]:
phi3_template = """
<|system|>
You are a helpful assistant.<|end|>
<|user|>
{prompt}<|end|>
<|assistant|>
{start}
"""
prompt = """
Write a short blog post about a recipe and the inspiration behind it.
Do not include a title.
Only reveal the dish after the story.
Start with short story and then move to the recipe.
To re-iterate, do not include a title.
""".replace(
"\n", ""
)
start0 = """
Once upon a time, in the heart of Napa Valley, I stumbled upon an old, weathered cookbook in a quaint little antique shop.
The pages were yellowed with age, and the handwritten notes on the margins told stories of family gatherings and cherished
memories. One recipe, in particular, caught my eye\u2014a recipe for a dish that seemed to be a family heirloom, passed
down through generations. It was a recipe for a hearty, comforting meal that promised to warm the soul just as it did the
hearts of those who shared it in the past.
<br><br>
Inspired by the rich history and sentimental value of the recipe, I decided to recreate it in my own kitchen, infusing it with
my own touch while honoring its origins. The dish was a
""".replace(
"\n", ""
).replace(
"<br>", "\n"
)
scenario0 = "old_cookbook"
start1 = """
Once upon a time, in the quaint village of Le Puy-en-Velay, nestled in the heart of the Auvergne region of France, there
lived a passionate chef named \u00c9mile. His love for cooking was as deep as the history of his beloved village.
\u00c9mile often wandered the cobblestone streets, admiring the rustic charm of
the local markets, the vibrant colors of the fresh produce, and the bustling energy of the townsfolk.
<br><br>
One day, as he
strolled through the market, \u00c9mile's attention was captured by the sight of a group of children playing near a picturesque
fountain. They were laughing and splashing, their joyous energy infectious. \u00c9mile couldn't help but smile as he
watched the scene, his mind wandering to the importance of food in bringing people together, much like the laughter of those children.
<br><br>
It was then that \u00c9mile had an idea. He wanted to create a dish that would capture the essence of Le Puy-en-Velay, a dish that
would bring people together, much like the children's laughter. He envisioned a recipe that would blend the rich, bold flavors of the
village's local ingredients, creating a symphony of tastes that would evoke the warmth and community spirit of Le Puy-en-Velay.
<br><br>
And so, inspired by the laughter of the children and the vibrant energy of his village, \u00c9mile set to work on his masterpiece.
He carefully selected the finest ingredients, handpicked from the local markets, and brought together
""".replace(
"\n", ""
).replace(
"<br>", "\n"
)
scenario1 = "laughing_children"
start2 = """
In the heart of the bustling city, nestled between the vibrant shops and cafes, there lived a young woman named Elise.
She was a culinary enthusiast who found solace in the kitchen, where she could express her creativity through the art of cooking.
Although she had a knack for blending flavors, Elise often found herself yearning for something more unique, something that could
encapsulate the essence of her experiences.
<br><br>
One evening, while wandering through the local farmer's market, Elise stumbled upon
a small, family-owned stall that was brimming with an array of exotic spices and herbs. The aroma was enchanting, and the colors
vivid, drawing her in like a moth to a flame. The stall' composure was a reflection of their passion for their craft, and it struck
a chord with Elise. She began to frequent the market, learning about the origins of each spice and herb, and the stories behind
the farmers who grew them.
<br><br>
One day, Elise met a friendly old woman who sold a rare spice called \"Saffron.\" The woman told Elise about the painstaking process
of harvesting the precious threads and the rich history of the spice, which has been used for centuries in both culinary and medicinal
applications. Intrigued by the story and the unique flavor of saffron, Elise decided to create a dish that would celebrate the spice
and the people who cultivated it.
<br><br>
Combining her love for cooking with her newfound passion for exploring new flavors, Elise embarked on a culinary journey to create a
dish that would pay homage to the saffron and the farmers who dedicated their lives to its cultivation. After several trials and a
ew failed attempts, she finally perfected a recipe that paired the delicate saffron with a medley of fresh, local ingredients.
The result was a dish that was not only a feast for the palate but also a tribute to the farmers who had inspired Elise's journey.
<br><br>
""".replace(
"\n", ""
).replace(
"<br>", "\n"
)
scenario2 = "saffron"
start3 = """
Once upon a time, in a quaint little village nestled between rolling hills and verdant fields,
there lived an elderly woman named Agnes. Agnes was known for her warm smile and her legendary
Sunday dinners that brought the entire neighborhood together. Her recipes were family heirlooms,
passed down through generations, with each family adding their own touch to the final dish.
<br><br>
One crisp autumn evening, Agnes was reminiscing about her childhood, and how her grandmother used
to gather everyone around the dinner table, sharing stories and laughter. These were the moments
that shaped her, the memories that she passed on to her own children and grandchildren.
<br><br>
Inspired by her grandmother's legacy, Agnes decided to create a new dish that would encapsulate
the essence of those cherished gatherings. She wanted something that was comforting and nourishing,
a dish that could be prepared with love and shared with others. After days of experimentation, she
finally created a recipe that she believed truly captured the spirit of her family's Sunday dinners.
<br><br>
**Agnes's
""".replace(
"\n", ""
).replace(
"<br>", "\n"
)
scenario3 = "Agnes"
start4 = """
In the heart of the bustling city, amidst the cacophony of honking cars and the clamor of hurried pedestrians, there
was a small apartment that served as a sanctuary for the soul. Here, an aspiring chef named Mia spent her evenings
experimenting with flavors that spoke to her heritage. Her latest culinary adventure was born out of a desire to reconnect
with her roots. Mia's grandmother, a beacon of culinary wisdom, had always emphasized the importance of homemade meals.
Her kitchen was filled with stories of family gatherings where laughter and the aroma of food intertwined. Mia found solace
in these memories and decided to pay homage to her grandmother's legacy through a dish that was as comforting as it was delicious.
<br><br>
As she began to gather the ingredients, Mia's mind wandered back to the countless afternoons spent in her grandmother's kitchen,
where the rhythmic sound of her humming accompanied the chopping of vegetables and the simmering of stews. It was here that the
foundation of Mia's recipe was laid, with each ingredient carefully selected to evoke the essence of her grandmother's love for cooking.
<br><br>
The dish she created was a testament to the warmth of family gatherings and the joy of sharing a meal. It was a hearty, one-pot wonder
that required minimal effort yet yielded maximum satisfaction. The recipe was simple, yet it was imbued with the complexity of
flavors that her grandmother had mastered.
<br><br>
Now, it's time to reveal the dish that Mia crafted with such love and care.
<br><br>
""".replace(
"\n", ""
).replace(
"<br>", "\n"
)
scenario4 = "onepot"
starts = {
scenario0: start0,
scenario1: start1,
scenario2: start2,
scenario3: start3,
scenario4: start4,
}
texts = {
key: phi3_template.format(prompt=prompt, start=start)
for key, start in starts.items()
}
print(texts[scenario0])
Test by generating once¶
In [ ]:
generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
temperature = 0.2
generation_args = {
"max_new_tokens": 200,
"return_full_text": False,
"temperature": temperature,
"do_sample": True,
# "do_sample": False,
}
output = generator(texts[scenario3], **generation_args)
In [ ]:
print(output[0]["generated_text"])
Generate many samples and save in jsonl¶
In [ ]:
max_new_tokens = 200
generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = f"../results/{current_time}_different_stories.jsonl"
n_generations_default = 20
# create filename if not exists
if not exists(filename):
with open(filename, "w") as f:
pass
for scenario_name, text in texts.items():
start = starts[scenario_name]
for temperature in [0.0, 0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1.0]:
if temperature == 0.0:
generation_args = {
"max_new_tokens": max_new_tokens,
"return_full_text": False,
"do_sample": False,
}
n_generations = 1
else:
generation_args = {
"max_new_tokens": max_new_tokens,
"return_full_text": False,
"temperature": temperature,
"do_sample": True,
}
n_generations = n_generations_default
for _ in range(n_generations):
output = generator(text, **generation_args)
data = {
"model": model_id,
"temperature": temperature,
"max_new_tokens": max_new_tokens,
"scenario": scenario_name,
"prompt": prompt,
"start": start,
"output": output[0]["generated_text"],
}
with open(filename, "a") as file:
file.write(json.dumps(data) + "\n")
In [ ]:
# load results from json files
files = listdir("../results")
for file in files:
print(file)
2024-07-25_10-50-45_one_scenario_repeated.jsonl 2024-07-25_08-45-48_laughing_kids_different_countries.jsonl 2024-07-23_different_stories.jsonl 2024-07-13_18-04-11_agnes_story.jsonl 2024-07-13_08-31_story_creation.jsonl
In [ ]:
filepath = "../results/2024-07-23_different_stories.jsonl"
results_df = pd.read_json(filepath, lines=True)
results_df.head()
Out[ ]:
| model | temperature | max_new_tokens | scenario | prompt | start | output | |
|---|---|---|---|---|---|---|---|
| 0 | microsoft/Phi-3-mini-4k-instruct | 0.0 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Rustic Chicken Pot Pie**\n\nIngredients:\n... |
| 1 | microsoft/Phi-3-mini-4k-instruct | 0.1 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Rustic Chicken Pot Pie**\n\nIngredients:\n... |
| 2 | microsoft/Phi-3-mini-4k-instruct | 0.1 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Rustic Chicken Pot Pie**\n\nIngredients:\n... |
| 3 | microsoft/Phi-3-mini-4k-instruct | 0.1 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Rustic Chicken Pot Pie**\n\nIngredients:\n... |
| 4 | microsoft/Phi-3-mini-4k-instruct | 0.1 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Slow-Cooked Beef Bourguignon**\n\nIngredie... |
In [ ]:
scenarios = results_df["scenario"].unique()
scenarios[0]
Out[ ]:
'old_cookbook'
Apply sentence embedding¶
In [ ]:
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(results_df["output"].to_list())
print(embeddings.shape)
(705, 384)
In [ ]:
results_df["embeddings"] = embeddings.tolist()
results_df.head()
Out[ ]:
| model | temperature | max_new_tokens | scenario | prompt | start | output | embeddings | |
|---|---|---|---|---|---|---|---|---|
| 0 | microsoft/Phi-3-mini-4k-instruct | 0.0 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Rustic Chicken Pot Pie**\n\nIngredients:\n... | [-0.008801046758890152, -0.009456978179514408,... |
| 1 | microsoft/Phi-3-mini-4k-instruct | 0.1 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Rustic Chicken Pot Pie**\n\nIngredients:\n... | [-0.008801046758890152, -0.009456978179514408,... |
| 2 | microsoft/Phi-3-mini-4k-instruct | 0.1 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Rustic Chicken Pot Pie**\n\nIngredients:\n... | [0.00935022160410881, -0.01635347492992878, 0.... |
| 3 | microsoft/Phi-3-mini-4k-instruct | 0.1 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Rustic Chicken Pot Pie**\n\nIngredients:\n... | [0.010385017842054367, -0.02279820665717125, 0... |
| 4 | microsoft/Phi-3-mini-4k-instruct | 0.1 | 200 | old_cookbook | Write a short blog post about a recipe and the... | Once upon a time, in the heart of Napa Valley,... | \n**Slow-Cooked Beef Bourguignon**\n\nIngredie... | [0.006247187964618206, -0.005046136677265167, ... |
In [ ]:
# define function that gets embeddings from the dataframe and concatenates into a 2d array
def get_embeddings(df):
embeddings = np.array(df["embeddings"].to_list())
return embeddings
embeddings = get_embeddings(results_df)
print(embeddings.shape)
(705, 384)
Cluster¶
In [ ]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import pairwise_distances
# the cosine metric is 1 - cosine_similarity
# So, for example, distance_threshold 0.1 means all elements in a clusters are within 0.9 similarity of each other
distance_threshold = 0.2
results_df["cluster"] = None
for scenario in scenarios:
sub_data = results_df[results_df["scenario"] == scenario]
embeddings = get_embeddings(sub_data)
clustering = AgglomerativeClustering(
n_clusters=None,
metric="cosine",
linkage="complete",
distance_threshold=distance_threshold,
)
results_df.loc[results_df["scenario"] == scenario, "cluster"] = (
clustering.fit_predict(embeddings)
)
results_df["cluster"] = results_df["cluster"].astype(str)
Carry out dimension reduction and visualize¶
In [ ]:
import textwrap
def dim_reduce_and_plot(
data: pd.DataFrame,
reducer,
title: str,
color_by: str = "temperature",
truncation_length: int = 200,
):
import plotly.express as px
embeddings = get_embeddings(data)
embeddings_reduced = reducer.fit_transform(embeddings)
data_temp = data.copy()
data_temp["dim1"] = embeddings_reduced[:, 0]
data_temp["dim2"] = embeddings_reduced[:, 1]
data_temp["truncated_output"] = data_temp["output"].apply(
lambda x: "<br>".join(textwrap.wrap(x[:truncation_length], width=80))
)
fig = px.scatter(
data_temp,
x="dim1",
y="dim2",
hover_data=["temperature", "cluster", "truncated_output"],
title=title,
color=color_by,
)
fig.update_traces(
hovertemplate="<br>".join(
[
"Temperature: %{customdata[0]}",
"cluster: %{customdata[1]}",
"Output: %{customdata[2]}",
]
)
)
fig.show()
In [ ]:
for scenario in scenarios:
sub_data = results_df[results_df["scenario"] == scenario]
reducer_name = "tsne"
reducer = TSNE(n_components=2, random_state=0, perplexity=20)
# reducer_name = 'pca'
# reducer = PCA(n_components=2)
# reducer_name = 'umap'
# reducer = UMAP(n_components=2, random_state=0)
dim_reduce_and_plot(
data=sub_data,
reducer=reducer,
title=f"{reducer_name} for {scenario}",
color_by="cluster",
truncation_length=480,
)
Cosine similarity with temperature 0 output¶
In [ ]:
from sklearn.metrics.pairwise import cosine_similarity
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
def plot_cdf_cosine_similarity(data: pd.DataFrame, title: str):
data = data.copy()
zero_embedding = data.loc[data["temperature"] == 0, "embeddings"].values[0]
zero_embedding = np.array(zero_embedding).reshape(1, -1)
data["cosine_sim"] = cosine_similarity(get_embeddings(data), zero_embedding)
# Get unique temps
# use [1:] to exclude 0 temperature
temperatures = np.sort(data["temperature"].unique())[1:]
# Create subplot
fig = make_subplots(rows=1, cols=1)
# Add a CDF trace for each category
for temp in temperatures:
# Filter data for the current category
subset = data.loc[data["temperature"] == temp, "cosine_sim"]
# Sort the data and calculate CDF
sorted_data = np.sort(subset)
cdf = np.arange(1, len(sorted_data) + 1) / len(sorted_data)
# Add trace
fig.add_trace(
go.Scatter(x=sorted_data, y=cdf, mode="lines", name=str(temp)), row=1, col=1
)
# Update layout
fig.update_layout(
title=title,
xaxis_title="Cosine Similarity",
yaxis_title="Cumulative Probability",
legend_title="Temperature",
xaxis=dict(range=[0, 1]),
)
# Show the plot
fig.show()
In [ ]:
for scenario in scenarios:
sub_data = results_df[results_df["scenario"] == scenario]
plot_cdf_cosine_similarity(
sub_data, f"CDF of Cosine Similarity with Temperature 0 output for {scenario}"
)
Rules based stats / evaluations¶
In [ ]:
# for scenario0
df_subset = results_df[results_df["scenario"] == scenarios[0]].copy()
df_subset["contains_rustic_chicken_pot_pie"] = df_subset["output"].str.contains(
"rustic chicken pot pie", case=False
)
df_subset["contains_chicken"] = df_subset["output"].str.contains("chicken", case=False)
df_subset["contains_beef"] = df_subset["output"].str.contains("beef", case=False)
df_subset.groupby("temperature")[
["contains_rustic_chicken_pot_pie", "contains_chicken", "contains_beef"]
].mean()
Out[ ]:
| contains_rustic_chicken_pot_pie | contains_chicken | contains_beef | |
|---|---|---|---|
| temperature | |||
| 0.0 | 1.00 | 1.00 | 0.00 |
| 0.1 | 0.90 | 0.90 | 0.10 |
| 0.2 | 0.80 | 0.90 | 0.10 |
| 0.3 | 0.35 | 0.65 | 0.25 |
| 0.4 | 0.20 | 0.70 | 0.30 |
| 0.6 | 0.10 | 0.55 | 0.15 |
| 0.8 | 0.00 | 0.30 | 0.40 |
| 1.0 | 0.00 | 0.20 | 0.30 |
In [ ]:
# for scenario1
df_subset = results_df[results_df["scenario"] == scenarios[1]].copy()
df_subset["contains_village_ratatouille"] = df_subset["output"].str.contains(
"Le Puy-en-Velay Ratatouille", case=False
)
df_subset["contains_ratatouille"] = df_subset["output"].str.contains(
"ratatouille", case=False
)
df_subset["contains_bourguignon"] = df_subset["output"].str.contains(
"Bourguignon", case=False
)
df_subset.groupby("temperature")[
["contains_village_ratatouille", "contains_ratatouille", "contains_bourguignon"]
].mean()
Out[ ]:
| contains_village_ratatouille | contains_ratatouille | contains_bourguignon | |
|---|---|---|---|
| temperature | |||
| 0.0 | 1.00 | 1.00 | 0.00 |
| 0.1 | 0.95 | 1.00 | 0.00 |
| 0.2 | 0.45 | 0.55 | 0.30 |
| 0.3 | 0.20 | 0.40 | 0.20 |
| 0.4 | 0.10 | 0.20 | 0.15 |
| 0.6 | 0.20 | 0.30 | 0.10 |
| 0.8 | 0.05 | 0.15 | 0.10 |
| 1.0 | 0.00 | 0.05 | 0.05 |
In [ ]:
# for scenario2
df_subset = results_df[results_df["scenario"] == scenarios[2]].copy()
df_subset["contains_particular_dish"] = df_subset["output"].str.contains(
"Saffron-Infused Risotto with Roasted Vegetables", case=False
)
df_subset["contains_risotto"] = df_subset["output"].str.contains("risotto", case=False)
df_subset["contains_roasted_vegetables"] = df_subset["output"].str.contains(
"roasted vegetables", case=False
)
df_subset.groupby("temperature")[
[
"contains_particular_dish",
"contains_risotto",
"contains_roasted_vegetables",
]
].mean()
Out[ ]:
| contains_particular_dish | contains_risotto | contains_roasted_vegetables | |
|---|---|---|---|
| temperature | |||
| 0.0 | 1.00 | 1.00 | 1.00 |
| 0.1 | 0.95 | 1.00 | 1.00 |
| 0.2 | 0.95 | 1.00 | 1.00 |
| 0.3 | 0.65 | 0.95 | 0.90 |
| 0.4 | 0.50 | 0.80 | 0.75 |
| 0.6 | 0.05 | 0.40 | 0.20 |
| 0.8 | 0.00 | 0.35 | 0.20 |
| 1.0 | 0.05 | 0.40 | 0.10 |
In [ ]:
# for scenario3
df_subset = results_df[results_df["scenario"] == scenarios[3]].copy()
df_subset["contains_particular_name"] = df_subset["output"].str.contains(
"Agnes's Sunday Dinner Delight", case=False
)
df_subset["contains_chicken"] = df_subset["output"].str.contains("chicken", case=False)
df_subset["contains_vegetable_stew"] = df_subset["output"].str.contains(
"vegetable stew", case=False
)
df_subset.groupby("temperature")[
["contains_particular_name", "contains_chicken", "contains_vegetable_stew"]
].mean()
Out[ ]:
| contains_particular_name | contains_chicken | contains_vegetable_stew | |
|---|---|---|---|
| temperature | |||
| 0.0 | 1.00 | 1.00 | 0.00 |
| 0.1 | 0.75 | 0.75 | 0.25 |
| 0.2 | 0.65 | 0.80 | 0.25 |
| 0.3 | 0.30 | 0.75 | 0.20 |
| 0.4 | 0.15 | 0.60 | 0.30 |
| 0.6 | 0.05 | 0.55 | 0.05 |
| 0.8 | 0.00 | 0.45 | 0.05 |
| 1.0 | 0.00 | 0.35 | 0.05 |
In [ ]:
# for scenario4
df_subset = results_df[results_df["scenario"] == scenarios[4]].copy()
df_subset["contains_stew"] = df_subset["output"].str.contains("stew", case=False)
df_subset["contains_chicken"] = df_subset["output"].str.contains("chicken", case=False)
df_subset["contains_casserole"] = df_subset["output"].str.contains(
"casserole", case=False
)
df_subset["contains_beef"] = df_subset["output"].str.contains("beef", case=False)
df_subset.groupby("temperature")[
["contains_stew", "contains_chicken", "contains_casserole", "contains_beef"]
].mean()
Out[ ]:
| contains_stew | contains_chicken | contains_casserole | contains_beef | |
|---|---|---|---|---|
| temperature | ||||
| 0.0 | 1.00 | 0.00 | 0.00 | 1.00 |
| 0.1 | 0.50 | 0.90 | 0.25 | 0.10 |
| 0.2 | 0.75 | 0.90 | 0.20 | 0.10 |
| 0.3 | 0.40 | 0.85 | 0.45 | 0.10 |
| 0.4 | 0.55 | 0.60 | 0.10 | 0.35 |
| 0.6 | 0.35 | 0.50 | 0.30 | 0.20 |
| 0.8 | 0.35 | 0.60 | 0.15 | 0.15 |
| 1.0 | 0.40 | 0.40 | 0.15 | 0.10 |